library(dplyr)
package 㤼㸱dplyr㤼㸲 was built under R version 4.0.4
Attaching package: 㤼㸱dplyr㤼㸲

The following objects are masked from 㤼㸱package:stats㤼㸲:

    filter, lag

The following objects are masked from 㤼㸱package:base㤼㸲:

    intersect, setdiff, setequal, union
library(ggplot2)
package 㤼㸱ggplot2㤼㸲 was built under R version 4.0.4
library(tidyr)
package 㤼㸱tidyr㤼㸲 was built under R version 4.0.3
library(plotly)
package 㤼㸱plotly㤼㸲 was built under R version 4.0.5Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 methods overwritten by 'htmltools':
  method               from         
  print.html           tools:rstudio
  print.shiny.tag      tools:rstudio
  print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: 㤼㸱plotly㤼㸲

The following object is masked from 㤼㸱package:ggplot2㤼㸲:

    last_plot

The following object is masked from 㤼㸱package:stats㤼㸲:

    filter

The following object is masked from 㤼㸱package:graphics㤼㸲:

    layout

Introduction

We have been given the task of exploring time series analysis as part of a hackathon. We have been given access to 3 datasets:

  1. Data on COVID-19 (coronavirus) by Our World in Data

1. Data on COVID-19 (coronavirus) by Our World in Data

This data can be read in directly from GitHub using {readr}. The col_types have to be specified up front otherwise it doesn’t load correctly.

# Read in COVID data from GitHub with correct types
covid_df <- readr::read_csv(
  file = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv",
  col_types = "cccDdddddddddddddddddddddddddddddcddddddddddddddddddddddddddddddddd"
)

# Inspect it
head(covid_df)

There are many columns and groups but we are going to focus on United Kingdom with a subset of columns.

# Filter and subset columns
uk_covid_df <- covid_df %>% 
  filter(location == "United Kingdom") %>%
  # Rename some columns so they are either {col} or cumulative_{col}
  select(
    date, 
    cases = new_cases,
    cumulative_cases = total_cases,
    deaths = new_deaths, 
    cumulative_deaths = total_deaths, 
    excess_mortality, 
    cumulative_excess_mortality = excess_mortality_cumulative, 
    cumulative_excess_mortality_absolute = excess_mortality_cumulative_absolute)

# Inspect it
head(uk_covid_df)

Check the date range and also some quick plots of the {col}s and cumulative_{col}s.

# Print off a summary of the dates
print(paste("Start date of cases:",uk_covid_df %>% summarise(min(date)) %>% pull()))
[1] "Start date of cases: 2020-01-31"
print(paste("Start date of deaths:", uk_covid_df %>% filter(cumulative_deaths > 0) %>% summarise(min(date)) %>% pull()))
[1] "Start date of deaths: 2020-03-06"
print(paste("End date of cases:",uk_covid_df %>% summarise(max(date)) %>% pull()))
[1] "End date of cases: 2021-11-16"
print(paste("End date of deaths:", uk_covid_df %>% filter(cumulative_deaths > 0) %>% summarise(max(date)) %>% pull()))
[1] "End date of deaths: 2021-11-16"
# Chart the non-cumulative cols (not sure how you can have negative deaths)
p <- uk_covid_df %>% 
  select(-starts_with("cumulative_")) %>%
  pivot_longer(cols = -date, names_to = "measure", values_to = "count") %>%
  ggplot(aes(x = date, y = count)) +
  geom_line() +
  facet_wrap(facets = ~measure, ncol = 1, scales = "free_y")

ggplotly(p)
# Chart the cumulative cols (not sure how you can have negative deaths)
p <- uk_covid_df %>% 
  select(date, starts_with("cumulative_")) %>%
  pivot_longer(cols = -date, names_to = "measure", values_to = "count") %>%
  ggplot(aes(x = date, y = count, group = measure)) +
  geom_line() +
  facet_wrap(facets = ~measure, ncol = 1, scales = "free_y")

ggplotly(p)
LS0tDQp0aXRsZTogIkhhY2thdGhvbiBOb3ZlbWJlciAyMDIxIC0gR3JvdXAgMiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkodGlkeXIpDQpsaWJyYXJ5KHBsb3RseSkNCmBgYA0KDQojIEludHJvZHVjdGlvbg0KDQpXZSBoYXZlIGJlZW4gZ2l2ZW4gdGhlIHRhc2sgb2YgZXhwbG9yaW5nIHRpbWUgc2VyaWVzIGFuYWx5c2lzIGFzIHBhcnQgb2YgYSBoYWNrYXRob24uIFdlIGhhdmUgYmVlbiBnaXZlbiBhY2Nlc3MgdG8gMyBkYXRhc2V0czoNCg0KMS4gW0RhdGEgb24gQ09WSUQtMTkgKGNvcm9uYXZpcnVzKSBieSBPdXIgV29ybGQgaW4gRGF0YV0oaHR0cHM6Ly9naXRodWIuY29tL293aWQvY292aWQtMTktZGF0YS90cmVlL21hc3Rlci9wdWJsaWMvZGF0YSkNCjIuDQozLg0KDQojIyAxLiBEYXRhIG9uIENPVklELTE5IChjb3JvbmF2aXJ1cykgYnkgT3VyIFdvcmxkIGluIERhdGENCg0KVGhpcyBkYXRhIGNhbiBiZSByZWFkIGluIGRpcmVjdGx5IGZyb20gR2l0SHViIHVzaW5nIGB7cmVhZHJ9YC4gVGhlIGBjb2xfdHlwZXNgIGhhdmUgdG8gYmUgc3BlY2lmaWVkIHVwIGZyb250IG90aGVyd2lzZSBpdCBkb2Vzbid0IGxvYWQgY29ycmVjdGx5Lg0KDQpgYGB7cn0NCiMgUmVhZCBpbiBDT1ZJRCBkYXRhIGZyb20gR2l0SHViIHdpdGggY29ycmVjdCB0eXBlcw0KY292aWRfZGYgPC0gcmVhZHI6OnJlYWRfY3N2KA0KICBmaWxlID0gImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9vd2lkL2NvdmlkLTE5LWRhdGEvbWFzdGVyL3B1YmxpYy9kYXRhL293aWQtY292aWQtZGF0YS5jc3YiLA0KICBjb2xfdHlwZXMgPSAiY2NjRGRkZGRkZGRkZGRkZGRkZGRkZGRkZGRkZGRkZGRkY2RkZGRkZGRkZGRkZGRkZGRkZGRkZGRkZGRkZGRkZGRkZCINCikNCg0KIyBJbnNwZWN0IGl0DQpoZWFkKGNvdmlkX2RmKQ0KYGBgDQoNClRoZXJlIGFyZSBtYW55IGNvbHVtbnMgYW5kIGdyb3VwcyBidXQgd2UgYXJlIGdvaW5nIHRvIGZvY3VzIG9uIFVuaXRlZCBLaW5nZG9tIHdpdGggYSBzdWJzZXQgb2YgY29sdW1ucy4NCg0KYGBge3J9DQojIEZpbHRlciBhbmQgc3Vic2V0IGNvbHVtbnMNCnVrX2NvdmlkX2RmIDwtIGNvdmlkX2RmICU+JSANCiAgZmlsdGVyKGxvY2F0aW9uID09ICJVbml0ZWQgS2luZ2RvbSIpICU+JQ0KICAjIFJlbmFtZSBzb21lIGNvbHVtbnMgc28gdGhleSBhcmUgZWl0aGVyIHtjb2x9IG9yIGN1bXVsYXRpdmVfe2NvbH0NCiAgc2VsZWN0KA0KICAgIGRhdGUsIA0KICAgIGNhc2VzID0gbmV3X2Nhc2VzLA0KICAgIGN1bXVsYXRpdmVfY2FzZXMgPSB0b3RhbF9jYXNlcywNCiAgICBkZWF0aHMgPSBuZXdfZGVhdGhzLCANCiAgICBjdW11bGF0aXZlX2RlYXRocyA9IHRvdGFsX2RlYXRocywgDQogICAgZXhjZXNzX21vcnRhbGl0eSwgDQogICAgY3VtdWxhdGl2ZV9leGNlc3NfbW9ydGFsaXR5ID0gZXhjZXNzX21vcnRhbGl0eV9jdW11bGF0aXZlLCANCiAgICBjdW11bGF0aXZlX2V4Y2Vzc19tb3J0YWxpdHlfYWJzb2x1dGUgPSBleGNlc3NfbW9ydGFsaXR5X2N1bXVsYXRpdmVfYWJzb2x1dGUpDQoNCiMgSW5zcGVjdCBpdA0KaGVhZCh1a19jb3ZpZF9kZikNCmBgYA0KDQpDaGVjayB0aGUgZGF0ZSByYW5nZSBhbmQgYWxzbyBzb21lIHF1aWNrIHBsb3RzIG9mIHRoZSB7Y29sfXMgYW5kIGN1bXVsYXRpdmVfe2NvbH1zLg0KDQpgYGB7cn0NCiMgUHJpbnQgb2ZmIGEgc3VtbWFyeSBvZiB0aGUgZGF0ZXMNCnByaW50KHBhc3RlKCJTdGFydCBkYXRlIG9mIGNhc2VzOiIsdWtfY292aWRfZGYgJT4lIHN1bW1hcmlzZShtaW4oZGF0ZSkpICU+JSBwdWxsKCkpKQ0KcHJpbnQocGFzdGUoIlN0YXJ0IGRhdGUgb2YgZGVhdGhzOiIsIHVrX2NvdmlkX2RmICU+JSBmaWx0ZXIoY3VtdWxhdGl2ZV9kZWF0aHMgPiAwKSAlPiUgc3VtbWFyaXNlKG1pbihkYXRlKSkgJT4lIHB1bGwoKSkpDQpwcmludChwYXN0ZSgiRW5kIGRhdGUgb2YgY2FzZXM6Iix1a19jb3ZpZF9kZiAlPiUgc3VtbWFyaXNlKG1heChkYXRlKSkgJT4lIHB1bGwoKSkpDQpwcmludChwYXN0ZSgiRW5kIGRhdGUgb2YgZGVhdGhzOiIsIHVrX2NvdmlkX2RmICU+JSBmaWx0ZXIoY3VtdWxhdGl2ZV9kZWF0aHMgPiAwKSAlPiUgc3VtbWFyaXNlKG1heChkYXRlKSkgJT4lIHB1bGwoKSkpDQpgYGANCmBgYHtyfQ0KIyBDaGFydCB0aGUgbm9uLWN1bXVsYXRpdmUgY29scyAobm90IHN1cmUgaG93IHlvdSBjYW4gaGF2ZSBuZWdhdGl2ZSBkZWF0aHMpDQpwIDwtIHVrX2NvdmlkX2RmICU+JSANCiAgc2VsZWN0KC1zdGFydHNfd2l0aCgiY3VtdWxhdGl2ZV8iKSkgJT4lDQogIHBpdm90X2xvbmdlcihjb2xzID0gLWRhdGUsIG5hbWVzX3RvID0gIm1lYXN1cmUiLCB2YWx1ZXNfdG8gPSAiY291bnQiKSAlPiUNCiAgZ2dwbG90KGFlcyh4ID0gZGF0ZSwgeSA9IGNvdW50KSkgKw0KICBnZW9tX2xpbmUoKSArDQogIGZhY2V0X3dyYXAoZmFjZXRzID0gfm1lYXN1cmUsIG5jb2wgPSAxLCBzY2FsZXMgPSAiZnJlZV95IikNCg0KZ2dwbG90bHkocCkNCmBgYA0KYGBge3J9DQojIENoYXJ0IHRoZSBjdW11bGF0aXZlIGNvbHMgKG5vdCBzdXJlIGhvdyB5b3UgY2FuIGhhdmUgbmVnYXRpdmUgZGVhdGhzKQ0KcCA8LSB1a19jb3ZpZF9kZiAlPiUgDQogIHNlbGVjdChkYXRlLCBzdGFydHNfd2l0aCgiY3VtdWxhdGl2ZV8iKSkgJT4lDQogIHBpdm90X2xvbmdlcihjb2xzID0gLWRhdGUsIG5hbWVzX3RvID0gIm1lYXN1cmUiLCB2YWx1ZXNfdG8gPSAiY291bnQiKSAlPiUNCiAgZ2dwbG90KGFlcyh4ID0gZGF0ZSwgeSA9IGNvdW50LCBncm91cCA9IG1lYXN1cmUpKSArDQogIGdlb21fbGluZSgpICsNCiAgZmFjZXRfd3JhcChmYWNldHMgPSB+bWVhc3VyZSwgbmNvbCA9IDEsIHNjYWxlcyA9ICJmcmVlX3kiKQ0KDQpnZ3Bsb3RseShwKQ0KYGBg